################################################################################-
# Replication File for Wratil, Wäckerle and Proksch: Government Rhetoric and the 
# Representation of Public Opinion in International Negotiations
#
# This script runs the robustness tests for the STM in Appendix J. 
# Many parts of this script take significant amount of time.
# These calculations can be skipped by leaving the settings below at "no".
# If you want to include these steps, switch the respective settings to "yes".
# The analyses in Appendix K are run with 2,500 simulations, while the main analysis
# was run with 10,000. This can be changed in the settings.
#
# Make sure you have the package reshape2 installed.
#
# Additionally, the script produces the following graphs and tables:
# Figure J1 through J13
# Table J1 through J7 (except for Table J2)
################################################################################-

#### Set Up and Load Data ####
library(stm)                #version 1.3.6
library(tidyverse)          #version 1.3.2
library(quanteda)           #version 3.2.1
library(quanteda.textstats) #version 0.95
library(reshape)            #version 0.8.9

theme_interaction <- theme(plot.title = element_text(face="bold"),
                           legend.position = "none",
                           axis.text=element_text(colour="black"),
                           axis.title=element_text(size=8,colour="black"),
                           strip.text.y = element_text(angle = 0, face="bold"),
                           strip.background =element_rect(fill="white"),
                           axis.ticks = element_blank(),
                           panel.background = element_blank(), 
                           panel.spacing.x = unit(0,"line"))

source("99_functions.R")

load("generated_data/dfm_for_stm.RData")

#### Settings ####

n_simulation <- 2500

run_r1_year_fe_stm            <- "no"
run_r1_year_fe_estimateEffect <- "no"
run_r1_year_fe_regressions    <- "no"

run_r2_ches_stm            <- "no"
run_r2_ches_estimateEffect <- "no"
run_r2_ches_regressions    <- "no"

run_r3_35_stm            <- "no"
run_r3_35_estimateEffect <- "no"
run_r3_35_regressions    <- "no"

run_r4_45_stm            <- "no"
run_r4_45_estimateEffect <- "no"
run_r4_45_regressions    <- "no"

run_r5_emu_stm            <- "no"
run_r5_emu_estimateEffect <- "no"
run_r5_emu_regressions    <- "no"

run_r6_lr_stm            <- "no"
run_r6_lr_estimateEffect <- "no"
run_r6_lr_regressions    <- "no"

###############################################################################-
#### Robustness Test 1: Year Fixed Effects ####

data_dfm_r1 <- data_dfm
docvars(data_dfm_r1) <- docvars(data_dfm_r1) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,
         part_of_speech,text_copy,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final,year) %>% 
  mutate(year = as.factor(year)) %>% 
  mutate(year = fct_relevel(year,"2013"))
data_dfm_r1 <- dfm_subset(data_dfm_r1,complete.cases(docvars(data_dfm_r1)))

if(run_r1_year_fe_stm == "yes"){
  set.seed(1711)
  stm_out_40_year_fe <- stm(documents = data_dfm_r1,K=40,prevalence=~ gov_eu_supporter*image_lag6m_scaled+
                              part_of_speech+
                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                              unemployment_scaled+inflation_scaled+north_south+
                              Negotiation_Stage+Council_Config_final+year,
                            data = docvars(data_dfm_r1),
                            init.type = "Spectral")
  
  save(file="generated_data/stm_out_r1_year_fe.RData",stm_out_40_year_fe)
}else{
  load(file="generated_data/stm_out_r1_year_fe.RData")
}

#### ...Frex Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_r1 <- stm::labelTopics(stm_out_40_year_fe,n = 20)
frex_r1 <- frex_r1[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_r1)){
  frex_r1$complete[i] <- paste(frex_r1[i,1:20],collapse=" ")
}

all_words_r1 <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_r1 %>% 
                         select(X,complete) %>% 
                         mutate(model="year"))

dfmat_r1 <- corpus(all_words_r1,text_field = "complete") %>% tokens() %>% dfm()
tstat2_r1 <- textstat_simil(dfmat_r1, method = "cosine", 
                         margin = "documents")

tstat2_pw_r1 <- as.matrix(tstat2_r1) %>% 
  melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "year")) %>% 
  mutate(comptext = case_when(
    robustness_test =="year" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext)

tstat2_pw_r1 %>% 
  filter(!is.na(robustness_test)) %>% 
  ggplot(aes(x=reorder(as.character(basetext),basetext), y=reorder(comptext,comptext), fill=value)) + 
  geom_tile(color = "white")+
  labs(x="Topic in Baseline Model",y="Topic in Year Fixed Effects Model")+
  scale_fill_gradient2(low = "blue", high = "red", 
                       midpoint = median(tstat2_pw_r1$value), 
                       limit = c(tstat2_pw_r1 %>% 
                                   select(value) %>% 
                                   min(),
                                 tstat2_pw_r1 %>% 
                                   select(value) %>% 
                                   max()), 
                       space = "Lab", 
                       name="Cosine Similarity") +
  coord_fixed()+ 
  theme(axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8),
        axis.title = element_text(size=12))

# This is Figure E1
ggsave("figures_appendix/figure_j_1.eps", width = 7, height = 7, units = "in")

#### ...estimateEffect ####

if(run_r1_year_fe_estimateEffect=="yes"){
  prep_council_40_year_fe <- estimateEffect_cluster(c(12,13,22,32,38,40) ~ gov_eu_supporter*image_lag6m_scaled+
                                                      part_of_speech+
                                                      gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                                      unemployment_scaled+inflation_scaled+north_south+
                                                      Negotiation_Stage+Council_Config_final+year,
                                                    stm_out_40_year_fe,nsims = n_simulation,
                                                    meta = docvars(data_dfm_r1), uncertainty = "Global")
  
  save(file="generated_data/prep_council_r1_year_fe.RData",prep_council_40_year_fe)
}else{
  load(file="generated_data/prep_council_r1_year_fe.RData")
}

#### ...regressions ####

data.sum <- docvars(data_dfm_r1) %>% 
  data.frame()

if(run_r1_year_fe_regressions=="yes"){

  #### Regression Topic 12 ###
  tab_12_r1 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 12,
                                        estimate_object = prep_council_40_year_fe)
  
  #### Regression Topic 13 ###
  tab_13_r1 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 13,
                                        estimate_object = prep_council_40_year_fe)
  
  #### Regression Topic 22 ###
  tab_22_r1 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 22,
                                        estimate_object = prep_council_40_year_fe)
  
  #### Regression Topic 32 ###
  tab_32_r1 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 32,
                                        estimate_object = prep_council_40_year_fe)
  
  #### Regression Topic 38 ###
  tab_38_r1 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 38,
                                        estimate_object = prep_council_40_year_fe)
  
  #### Regression Topic 40 ###
  tab_40_r1 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 40,
                                        estimate_object = prep_council_40_year_fe)
  
  results_all_r1 <- bind_rows(tab_12_r1,tab_13_r1,tab_22_r1,
                           tab_32_r1,tab_38_r1,tab_40_r1) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2),
                          rep("Cautious language",2),
                          rep("Raising a concern",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error)
  
  results_all_r1$moderator_level <- results_all_r1$moderator_level %>% 
    recode("baseline" = "Pro-EU Government",
           "gov_eu_supporterEurosceptic Government:image_lag6m_scaled" = "Eurosceptic Government")
  
  save(file="generated_data/regression_results_r1.RData",results_all_r1)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_r1.RData")
}

# This is Table J1
xtable::xtable(results_all_r1 %>% select(-upper,-lower) %>% dplyr::rename('Government Ideology' = "moderator_level",
                                                                       'Topic Label' = "Topic_name",
                                                                       't-value' = "tval",
                                                                       'p-value' = "p"),digits=5)

europhiles_r1 <- results_all_r1 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Pro-EU Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

eurosceptics_r1 <- results_all_r1 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Eurosceptic Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

# This is Figure J2
ggpubr::ggarrange(eurosceptics_r1,europhiles_r1,ncol=1)
ggsave("figures_appendix/figure_j_2.eps", width = 6, height = 6, units = "in")

###############################################################################-
#### Robustness Test 2: Combined CHES and CMP ####

data_dfm_r2 <- data_dfm
docvars(data_dfm_r2) <- docvars(data_dfm_r2) %>% 
  data.frame() %>% 
  select(image_lag6m_scaled,
         part_of_speech,text_copy,Actor,ches_cmp_comb,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final,year) 
data_dfm_r2 <- dfm_subset(data_dfm_r2,complete.cases(docvars(data_dfm_r2)))

if(run_r2_ches_stm == "yes"){
  set.seed(1711)
  stm_out_40_ches_cmp <- stm(documents = data_dfm_r2,K=40,prevalence=~ ches_cmp_comb*image_lag6m_scaled+
                               part_of_speech+
                               gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                               unemployment_scaled+inflation_scaled+north_south+
                               Negotiation_Stage+Council_Config_final,
                             data = docvars(data_dfm_r2),
                             init.type = "Spectral")
  save(file="generated_data/stm_out_r2_ches_cmp.RData",stm_out_40_ches_cmp)
}else{
  load(file="generated_data/stm_out_r2_ches_cmp.RData")
}

#### ...Frex Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_r2 <- stm::labelTopics(stm_out_40_ches_cmp,n = 20)
frex_r2 <- frex_r2[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_r2)){
  frex_r2$complete[i] <- paste(frex_r2[i,1:20],collapse=" ")
}

all_words <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_r2 %>% 
                         select(X,complete) %>% 
                         mutate(model="cmp_ches"))

dfmat <- corpus(all_words,text_field = "complete") %>% tokens() %>% dfm()
tstat2 <- textstat_simil(dfmat, method = "cosine", 
                         margin = "documents")

tstat2_pw <- as.matrix(tstat2) %>% 
  melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "cmp_ches")) %>% 
  mutate(comptext = case_when(
    robustness_test =="cmp_ches" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext) 

tstat2_pw %>% 
  filter(!is.na(robustness_test)) %>% 
  ggplot(aes(x=reorder(as.character(basetext),basetext), y=reorder(comptext,comptext), fill=value)) + 
  geom_tile(color = "white")+
  labs(x="Topic in Baseline Model",y="Topic in CMP-CHES Topics Model")+
  scale_fill_gradient2(low = "blue", high = "red", 
                       midpoint = median(tstat2_pw$value), 
                       limit = c(tstat2_pw %>% 
                                   select(value) %>% 
                                   min(),
                                 tstat2_pw %>% 
                                   select(value) %>% 
                                   max()), 
                       space = "Lab", 
                       name="Cosine Similarity") +
  coord_fixed()+ 
  theme(axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8),
        axis.title = element_text(size=12))

# This is Figure J3
ggsave("figures_appendix/figure_j_3.eps", width = 7, height = 7, units = "in")

tstat2_pw %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value)) %>% 
  arrange(basetext)

#### ...estimateEffect ####
if(run_r2_ches_estimateEffect=="yes"){
  prep_council_40_ches_cmp <- estimateEffect_cluster(c(12,13,22,32,38,40) ~ ches_cmp_comb*image_lag6m_scaled+
                                                       part_of_speech+
                                                       gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                                       unemployment_scaled+inflation_scaled+
                                                       Negotiation_Stage+Council_Config_final,
                                                     stm_out_40_ches_cmp,nsims = n_simulation,
                                                     meta = docvars(data_dfm_r2), uncertainty = "Global")
  
  save(file="generated_data/prep_council_r2_ches_cmp.RData",prep_council_40_ches_cmp)
}else{
  load(file="generated_data/prep_council_r2_ches_cmp.RData")
}

#### ...regressions ####
data.sum <- docvars(data_dfm_r2) %>% 
  data.frame()

if(run_r2_ches_regressions=="yes"){

  data.sum <- docvars(data_dfm_r2) %>% 
    data.frame()
  
  #### Regression Topic 12 ###
  tab_12 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "ches_cmp_comb",
                                        topic = 12,
                                        estimate_object = prep_council_40_ches_cmp)
  
  #### Regression Topic 13 ###
  tab_13 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "ches_cmp_comb",
                                        topic = 13,
                                        estimate_object = prep_council_40_ches_cmp)
  
  #### Regression Topic 22 ###
  tab_22 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "ches_cmp_comb",
                                        topic = 22,
                                        estimate_object = prep_council_40_ches_cmp)
  
  #### Regression Topic 32 ###
  tab_32 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "ches_cmp_comb",
                                        topic = 32,
                                        estimate_object = prep_council_40_ches_cmp)
  
  #### Regression Topic 38 ###
  tab_38 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "ches_cmp_comb",
                                        topic = 38,
                                        estimate_object = prep_council_40_ches_cmp)
  
  #### Regression Topic 40 ###
  tab_40 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "ches_cmp_comb",
                                        topic = 40,
                                        estimate_object = prep_council_40_ches_cmp)
  
  results_all <- bind_rows(tab_12,tab_13,tab_22,
                           tab_32,tab_38,tab_40) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2),
                          rep("Cautious language",2),
                          rep("Raising a concern",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error)
  
  results_all$moderator_level <- results_all$moderator_level %>% 
    recode("baseline" = "Pro-EU Government",
           "ches_cmp_combEurosceptic Government:image_lag6m_scaled" = "Eurosceptic Government")
  
  results_all_r2 <- results_all
  save(file="generated_data/regression_results_r2.RData",results_all_r2)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_r2.RData")
}

# This is Table J3
xtable::xtable(results_all_r2 %>% select(-upper,-lower) %>% dplyr::rename('Government Ideology' = "moderator_level",
                                                                       'Topic Label' = "Topic_name",
                                                                       't-value' = "tval",
                                                                       'p-value' = "p"),digits=5)

europhiles_r2 <- results_all_r2 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Pro-EU Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

eurosceptics_r2 <- results_all_r2 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Eurosceptic Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

# This is Figure J4
ggpubr::ggarrange(eurosceptics_r2,europhiles_r2,ncol=1)
ggsave("figures_appendix/figure_j_4.eps", width = 6, height = 6, units = "in")

###############################################################################-
#### Robustness Test 3: 35 Topics ####

data_dfm_r3 <- data_dfm
docvars(data_dfm_r3) <- docvars(data_dfm_r3) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,
         part_of_speech,text_copy,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final)
data_dfm_r3 <- dfm_subset(data_dfm_r3,complete.cases(docvars(data_dfm_r3)))

if(run_r3_35_stm == "yes"){
  set.seed(1711)
  stm_out_35 <- stm(documents = data_dfm_r3,K=35,prevalence=~ gov_eu_supporter*image_lag6m_scaled+
                      part_of_speech+
                      gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                      unemployment_scaled+inflation_scaled+north_south+
                      Negotiation_Stage+Council_Config_final,
                    data = docvars(data_dfm_r3),
                    init.type = "Spectral")
  save(file="generated_data/stm_out_r3_35.RData",stm_out_35)
}else{
  load(file="generated_data/stm_out_r3_35.RData")
}

#### ...Frex Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_r3 <- stm::labelTopics(stm_out_35,n = 20)
frex_r3 <- frex_r3[["frex"]] %>% data.frame() %>% mutate(X=1:35,complete = NA)
for(i in 1:nrow(frex_r3)){
  frex_r3$complete[i] <- paste(frex_r3[i,1:20],collapse=" ")
}

all_words <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_r3 %>% 
                         select(X,complete) %>% 
                         mutate(model="35"))

dfmat <- corpus(all_words,text_field = "complete") %>% tokens() %>% dfm()
tstat2 <- textstat_simil(dfmat, method = "cosine", 
                         margin = "documents")

tstat2_pw <- as.matrix(tstat2) %>% 
  melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:75 ~ "35")) %>% 
  mutate(comptext = case_when(
    robustness_test =="35" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext) 

tstat2_pw %>% 
  filter(!is.na(robustness_test)) %>% 
  ggplot(aes(x=reorder(as.character(basetext),basetext), y=reorder(comptext,comptext), fill=value)) + 
  geom_tile(color = "white")+
  labs(x="Topic in Baseline Model",y="Topic in 35 Topics Model")+
  scale_fill_gradient2(low = "blue", high = "red", 
                       midpoint = median(tstat2_pw$value), 
                       limit = c(tstat2_pw %>% 
                                   select(value) %>% 
                                   min(),
                                 tstat2_pw %>% 
                                   select(value) %>% 
                                   max()), 
                       space = "Lab", 
                       name="Cosine Similarity") +
  coord_fixed()+ 
  theme(axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8),
        axis.title = element_text(size=12))

# This is Figure J5
ggsave("figures_appendix/figure_j_5.eps", width = 7, height = 7, units = "in")

tstat2_pw %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value)) %>% 
  arrange(basetext)

#### ...estimateEffect ####
#12 is 12 (0.75)
#13 is 13 (0.9)
#22 is 22 (0.5)
#32 is 32 (0.85)
#38 is gone, highest would be 5 (0.45)
#40 is gone, highest would be 16 (0.15)

if(run_r3_35_estimateEffect=="yes"){
  prep_council_35 <- estimateEffect_cluster(c(12,13,22,32) ~ gov_eu_supporter*image_lag6m_scaled+
                                            part_of_speech+
                                            gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                            unemployment_scaled+inflation_scaled+north_south+
                                            Negotiation_Stage+Council_Config_final,
                                          stm_out_35,nsims = n_simulation,
                                          meta = docvars(data_dfm_r3), uncertainty = "Global")
  save(file="generated_data/prep_council_r3_35.RData",prep_council_35)
}else{
  load(file="generated_data/prep_council_r3_35.RData")
}

#### ...regressions ####
data.sum <- docvars(data_dfm_r3) %>% 
  data.frame()

if(run_r3_35_regressions=="yes"){

  data.sum <- docvars(data_dfm_r3) %>% 
    data.frame()
  
  #### Regression Topic 12 ###
  tab_12 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 12,
                                        estimate_object = prep_council_35)
  
  #### Regression Topic 13 ###
  tab_13 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 13,
                                        estimate_object = prep_council_35)
  
  #### Regression Topic 22 ###
  tab_22 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 22,
                                        estimate_object = prep_council_35)
  
  #### Regression Topic 32 ###
  tab_32 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 32,
                                        estimate_object = prep_council_35)
  
  results_all <- bind_rows(tab_12,tab_13,tab_22,
                           tab_32) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error)
  
  results_all$moderator_level <- results_all$moderator_level %>% 
    recode("baseline" = "Pro-EU Government",
           "gov_eu_supporterEurosceptic Government:image_lag6m_scaled" = "Eurosceptic Government")
  results_all_r3 <- results_all
  save(file="generated_data/regression_results_r3.RData",results_all_r3)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_r3.RData")
}

# This is Table J4
xtable::xtable(results_all_r3 %>% select(-upper,-lower) %>% dplyr::rename('Government Ideology' = "moderator_level",
                                                                       'Topic Label' = "Topic_name",
                                                                       't-value' = "tval",
                                                                       'p-value' = "p"),digits=5)

europhiles_r3 <- results_all_r3 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed") ~ "Hypothesis 3",
    term %in% c("Formulating a demand") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Pro-EU Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

eurosceptics_r3 <- results_all_r3 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed") ~ "Hypothesis 3",
    term %in% c("Formulating a demand") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Eurosceptic Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

# This is Figure J6
ggpubr::ggarrange(eurosceptics_r3,europhiles_r3,ncol=1)
ggsave("figures_appendix/figure_j_6.eps", width = 6, height = 6, units = "in")

###############################################################################-
#### Robustness Test 4: 45 Topics ####

data_dfm_r4 <- data_dfm
docvars(data_dfm_r4) <- docvars(data_dfm_r4) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,
         part_of_speech,text_copy,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final)
data_dfm_r4 <- dfm_subset(data_dfm_r4,complete.cases(docvars(data_dfm_r4)))

if(run_r4_45_stm == "yes"){
  set.seed(1711)
  stm_out_45 <- stm(documents = data_dfm_r4,K=45,prevalence=~ gov_eu_supporter*image_lag6m_scaled+
                      part_of_speech+
                      gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                      unemployment_scaled+inflation_scaled+north_south+
                      Negotiation_Stage+Council_Config_final,
                    data = docvars(data_dfm_r4),
                    init.type = "Spectral")
  save(file="generated_data/stm_out_r4_45.RData",stm_out_45)
}else{
  load(file="generated_data/stm_out_r4_45.RData")
}

#### ...Frex Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_r4 <- stm::labelTopics(stm_out_45,n = 20)
frex_r4 <- frex_r4[["frex"]] %>% data.frame() %>% mutate(X=1:45,complete = NA)
for(i in 1:nrow(frex_r4)){
  frex_r4$complete[i] <- paste(frex_r4[i,1:20],collapse=" ")
}

all_words <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_r4 %>% 
                         select(X,complete) %>% 
                         mutate(model="45"))

dfmat <- corpus(all_words,text_field = "complete") %>% tokens() %>% dfm()
tstat2 <- textstat_simil(dfmat, method = "cosine", 
                         margin = "documents")

tstat2_pw <- as.matrix(tstat2) %>% 
  melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:85 ~ "45")) %>% 
  mutate(comptext = case_when(
    robustness_test =="45" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext) 

tstat2_pw %>% 
  filter(!is.na(robustness_test)) %>% 
  ggplot(aes(x=reorder(as.character(basetext),basetext), y=reorder(comptext,comptext), fill=value)) + 
  geom_tile(color = "white")+
  labs(x="Topic in Baseline Model",y="Topic in 45 Topics Model")+
  scale_fill_gradient2(low = "blue", high = "red", 
                       midpoint = median(tstat2_pw$value), 
                       limit = c(tstat2_pw %>% 
                                   select(value) %>% 
                                   min(),
                                 tstat2_pw %>% 
                                   select(value) %>% 
                                   max()), 
                       space = "Lab", 
                       name="Cosine Similarity") +
  coord_fixed()+ 
  theme(axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8),
        axis.title = element_text(size=12))

# This is Figure J7
ggsave("figures_appendix/figure_j_7.eps", width = 7, height = 7, units = "in")

tstat2_pw %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  filter(value > 0.3) %>% 
  arrange(basetext)

#### ...estimateEffect ####

# 12 is 12 (0.65)
# 13 is 13 (0.85)
# 22 is 22 (0.65)
# 32 is 32 (0.85)
# 38 is 38 (0.75)
# 40 is gone
# 42 is new topic

if(run_r4_45_estimateEffect=="yes"){
  prep_council_45 <- estimateEffect_cluster(c(12,13,22,32,38,42) ~ gov_eu_supporter*image_lag6m_scaled+
                                              part_of_speech+
                                              gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Negotiation_Stage+Council_Config_final,
                                            stm_out_45,nsims = n_simulation,
                                            meta = docvars(data_dfm_r4), uncertainty = "Global")
  save(file="generated_data/prep_council_r4_45.RData",prep_council_45)
}else{
  load(file="generated_data/prep_council_r4_45.RData")
}

#### ...regressions ####
data.sum <- docvars(data_dfm_r4) %>% 
  data.frame()

if(run_r4_45_regressions=="yes"){
  
  #### Regression Topic 12 ###
  tab_12 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 12,
                                        estimate_object = prep_council_45)
  
  #### Regression Topic 13 ###
  tab_13 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 13,
                                        estimate_object = prep_council_45)
  
  #### Regression Topic 22 ###
  tab_22 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 22,
                                        estimate_object = prep_council_45)
  
  #### Regression Topic 32 ###
  tab_32 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 32,
                                        estimate_object = prep_council_45)
  
  #### Regression Topic 38 ###
  tab_38 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 38,
                                        estimate_object = prep_council_45)
  
  #### Regression Topic 42 ###
  tab_42 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 42,
                                        estimate_object = prep_council_45)
  
  results_all <- bind_rows(tab_12,tab_13,tab_22,
                           tab_32,tab_38,tab_42) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2),
                          rep("Cautious language",2),
                          rep("Cautious support of the proposal",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error)
  
  results_all$moderator_level <- results_all$moderator_level %>% 
    recode("baseline" = "Pro-EU Government",
           "gov_eu_supporterEurosceptic Government:image_lag6m_scaled" = "Eurosceptic Government")
  results_all_r4 <- results_all
  save(file="generated_data/regression_results_r4.RData",results_all_r4)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_r4.RData")
}

# This is Table J5
xtable::xtable(results_all_r4 %>% select(-upper,-lower) %>% dplyr::rename('Government Ideology' = "moderator_level",
                                                                       'Topic Label' = "Topic_name",
                                                                       't-value' = "tval",
                                                                       'p-value' = "p"),digits=5)

europhiles_r4 <- results_all_r4 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise","Cautious support of the proposal") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Pro-EU Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

eurosceptics_r4 <- results_all_r4 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise","Cautious support of the proposal") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Eurosceptic Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

# This is Figure J8
ggpubr::ggarrange(eurosceptics_r4,europhiles_r4,ncol=1)
ggsave("figures_appendix/figure_j_8.eps", width = 6, height = 6, units = "in")

#######################################-
#### Robustness Test 5: EMU control ####
#######################################-

data_dfm_r5 <- data_dfm
docvars(data_dfm_r5) <- docvars(data_dfm_r5) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,final_two_months,
         part_of_speech,text_copy,text_original,Actor,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final) %>% 
  mutate(emu = case_when(
    Actor == "SE" ~ "no",
    Actor == "PL" ~ "no",
    Actor == "CZ" ~ "no",
    Actor == "DK" ~ "no",
    Actor == "RO" ~ "no",
    Actor == "HR" ~ "no",
    Actor == "HU" ~ "no",
    Actor == "BG" ~ "no",
    Actor == "UK" ~ "no",
    Actor == "LT" & date_correct < "2015-01-01" ~ "no",
    Actor == "LV" & date_correct < "2014-01-01" ~ "no",
    Actor == "EE" & date_correct < "2011-01-01" ~ "no",
    TRUE ~ "yes"
  ))
data_dfm_r5 <- dfm_subset(data_dfm_r5,complete.cases(docvars(data_dfm_r5)))

if(run_r5_emu_stm == "yes"){
  set.seed(1711)
  stm_out_40_control_emu_model <- stm(documents = data_dfm_r5,K=40,
                                      prevalence=~ gov_eu_supporter*image_lag6m_scaled+
                                        part_of_speech+emu+
                                        gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                        unemployment_scaled+inflation_scaled+north_south+
                                        Negotiation_Stage+Council_Config_final,
                                      data = docvars(data_dfm_r5),
                                      init.type = "Spectral")
  save(file="generated_data/stm_out_r5_emu.RData",stm_out_40_control_emu_model)
}else{
  load(file="generated_data/stm_out_r5_emu.RData")
}

#### ...Frex Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_r5 <- stm::labelTopics(stm_out_40_control_emu_model,n = 20)
frex_r5 <- frex_r5[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_r5)){
  frex_r5$complete[i] <- paste(frex_r5[i,1:20],collapse=" ")
}

all_words_r5 <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_r5 %>% 
                         select(X,complete) %>% 
                         mutate(model="emu"))

dfmat_r5 <- corpus(all_words_r5,text_field = "complete") %>% tokens() %>% dfm()
tstat2_r5 <- textstat_simil(dfmat_r5, method = "cosine", 
                         margin = "documents")

tstat2_pw_r5 <- as.matrix(tstat2_r5) %>% 
  melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "emu")) %>% 
  mutate(comptext = case_when(
    robustness_test =="emu" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext) 

tstat2_pw_r5 %>% 
  filter(!is.na(robustness_test)) %>% 
  ggplot(aes(x=reorder(as.character(basetext),basetext), y=reorder(comptext,comptext), fill=value)) + 
  geom_tile(color = "white")+
  labs(x="Topic in Baseline Model",y="Topic in EMU Model")+
  scale_fill_gradient2(low = "blue", high = "red", 
                       midpoint = median(tstat2_pw$value), 
                       limit = c(tstat2_pw %>% 
                                   select(value) %>% 
                                   min(),
                                 tstat2_pw %>% 
                                   select(value) %>% 
                                   max()), 
                       space = "Lab", 
                       name="Cosine Similarity") +
  coord_fixed()+ 
  theme(axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8),
        axis.title = element_text(size=12))

# This is Figure J9
ggsave("figures_appendix/figure_j_9.eps", width = 7, height = 7, units = "in")

tstat2_pw_r5 %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value))

# Test: Is there a new citizen-related topic?
all_words_r5 %>% 
  filter(model=="emu") %>% 
  filter(grepl("vote|citizen|public",complete))

#### ...estimateEffect ####

if(run_r5_emu_estimateEffect=="yes"){
  prep_council_emu <- estimateEffect_cluster(1:40 ~ gov_eu_supporter*image_lag6m_scaled+
                                                               part_of_speech+emu+
                                                               gov_lr_cmp_static_scaled+eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                                               unemployment_scaled+inflation_scaled+north_south+
                                                               Negotiation_Stage+Council_Config_final,
                                                             stm_out_40_control_emu_model,nsims = n_simulation,
                                                             meta = docvars(data_dfm_r5), uncertainty = "Global")
  save(file="generated_data/prep_council_r5_emu.RData",prep_council_emu)
}else{
  load(file="generated_data/prep_council_r5_emu.RData")
}

#### ...regressions ####
data.sum <- docvars(data_dfm_r5) %>% 
  data.frame()

if(run_r5_emu_regressions=="yes"){
  
  mod_top_all <- summary.estimateEffect_comb(prep_council_emu,topics = 1:40)[[1]]
  
  all_reg <- mod_top_all$tables
  all_reg_emu <- data.frame(topic = 1:40,
                            est = NA,
                            se = NA)
  
  for (i in 1:nrow(all_reg_emu)){
    mod_save <- all_reg[[i]] %>% as.data.frame() %>% mutate(term=row.names(.))
    all_reg_emu$est[i] <- mod_save %>% filter(term == "emuyes") %>% select(Estimate) %>% unlist()
    all_reg_emu$se[i] <- mod_save %>% filter(term == "emuyes") %>% select(`Std. Error`) %>% unlist()
  }
  
  all_reg_emu$lower = all_reg_emu$est - 1.96*all_reg_emu$se
  all_reg_emu$upper = all_reg_emu$est + 1.96*all_reg_emu$se
  
  #### Regression Topic 12 ###
  tab_12 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                            var_mod = "gov_eu_supporter",
                                            topic = 12,
                                            estimate_object = prep_council_emu)
  
  #### Regression Topic 13 ###
  tab_13 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                            var_mod = "gov_eu_supporter",
                                            topic = 13,
                                            estimate_object = prep_council_emu)
  
  #### Regression Topic 22 ###
  tab_22 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                            var_mod = "gov_eu_supporter",
                                            topic = 22,
                                            estimate_object = prep_council_emu)
  
  #### Regression Topic 32 ###
  tab_32 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                            var_mod = "gov_eu_supporter",
                                            topic = 32,
                                            estimate_object = prep_council_emu)
  
  #### Regression Topic 38 ###
  tab_38 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                            var_mod = "gov_eu_supporter",
                                            topic = 38,
                                            estimate_object = prep_council_emu)
  
  #### Regression Topic 40 ###
  tab_40 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                            var_mod = "gov_eu_supporter",
                                            topic = 40,
                                            estimate_object = prep_council_emu)
  
  results_all_control_emu <- bind_rows(tab_12,tab_13,tab_22,
                                       tab_32,tab_38,tab_40) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2),
                          rep("Cautious language",2),
                          rep("Raising a concern",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error)
  
  results_all_control_emu$moderator_level <- results_all_control_emu$moderator_level %>% 
    recode("baseline" = "Pro-EU Government",
           "gov_eu_supporterEurosceptic Government:image_lag6m_scaled" = "Eurosceptic Government")
  
  results_all_r5 <- results_all_control_emu
  save(file="generated_data/regression_results_r5.RData",results_all_r5,all_reg_emu)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_r5.RData")
}

# This is Table J6
xtable::xtable(results_all_r5 %>% 
                 select(-upper,-lower) %>% 
                 dplyr::rename('Government Ideology' = "moderator_level",
                               'Topic Label' = "Topic_name",
                               't-value' = "tval",
                               'p-value' = "p"),digits=5)

all_reg_emu$topic_name<- all_reg_emu$topic %>% 
  recode("1"  = "Topic 1: Thanking I (rather specific person)",
         "2"  = "Topic 2: Exchange of information",
         "3"  = "Topic 3: Internal market",
         "4"  = "Topic 4: Ships",
         "5"  = "Topic 5: Reflection",
         "6"  = "Topic 6: Thanking II (rather abstract groups)",
         "7"  = "Topic 7: European judicial matters / \nEuropean public prosecutor",
         "8"  = "Topic 8: Referring to Commissioner",
         "9"  = "Topic 9: Banking supervision",
         "10" = "Topic 10: Burdens of implementation \n(esp. environment)",
         "11" = "Topic 11: Renewable energy and climate",
         "12" = "Topic 12: Delaying agreement",
         "13" = "Topic 13: Formulating a demand",
         "14" = "Topic 14: Invasive species",
         "15" = "Topic 15: GMOs",
         "16" = "Topic 16: Crime",
         "17" = "Topic 17: Budget",
         "18" = "Topic 18: Talking about legal text",
         "19" = "Topic 19: International money laundering",
         "20" = "Topic 20: Governance of four freedoms \n(e.g. capital, workers)",
         "21" = "Topic 21: Health and medical devices",
         "22" = "Topic 22: Supporting the compromise",
         "23" = "Topic 23: Congratulating",
         "24" = "Topic 24: Legal harmonization",
         "25" = "Topic 25: Cooperation between member states",
         "26" = "Topic 26: Air emissions and pollutants",
         "27" = "Topic 27: Tax evasion and fraud",
         "28" = "Topic 28: Banking union (esp. \nresolution and deposit insurance)",
         "29" = "Topic 29: Data protection",
         "30" = "Topic 30: Public procurement and tobacco",
         "31" = "Topic 31: Financial crisis",
         "32" = "Topic 32: More technical-level discussion needed",
         "33" = "Topic 33: Research and development \n(e.g. Horizon 2020)",
         "34" = "Topic 34: Audit and control",
         "35" = "Topic 35: Talking about reaching compromise",
         "36" = "Topic 36: Negotiations with EP",
         "37" = "Topic 37: Brief intervention",
         "38" = "Topic 38: Cautious language",
         "39" = "Topic 39: Affirmation",
         "40" = "Topic 40: Raising a concern"
  )

emu_direct_plot <- all_reg_emu %>% 
  ggplot(aes(x=est,xmin=lower,xmax=upper,y=reorder(topic_name,-topic))) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of Euro-Membership on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme(plot.title = element_text(face="bold"),
        legend.position = "none",
        axis.text=element_text(colour="black"),
        axis.title=element_text(size=8,colour="black"),
        strip.text.y = element_text(angle = 0, face="bold"),
        strip.background =element_rect(fill="white"),
        axis.ticks = element_blank(),
        panel.background = element_blank(), 
        panel.spacing.x = unit(0,"line"))

# This is Figure J11
emu_direct_plot
ggsave("figures_appendix/figure_j_11.eps", width = 6, height = 7.5, units = "in")

europhiles_r5 <- results_all_r5 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Pro-EU Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

eurosceptics_r5 <- results_all_r5 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Eurosceptic Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

# This is Figure J10
ggpubr::ggarrange(eurosceptics_r5,europhiles_r5,ncol=1)
ggsave("figures_appendix/figure_j_10.eps", width = 6, height = 6, units = "in")

#######################################-
#### Robustness Test 6: Excluding LR ####
#######################################-
data_dfm_r6 <- data_dfm
docvars(data_dfm_r6) <- docvars(data_dfm_r6) %>% 
  data.frame() %>% 
  select(gov_eu_supporter,image_lag6m_scaled,date_correct,Transcription,large_small,
         part_of_speech,text_copy,text_original,Actor,gov_eu_cmp_static,
         gov_lr_cmp_static_scaled,eu_receipts_gdp_scaled,budget_any,unanimity_any,
         unemployment_scaled,inflation_scaled,north_south,
         Negotiation_Stage,Council_Config_final) 

data_dfm_r6 <- dfm_subset(data_dfm_r6,complete.cases(docvars(data_dfm_r6)))

if(run_r6_lr_stm == "yes"){
  set.seed(1711)
  stm_out_40_left_right_model <- stm(documents = data_dfm_r6,K=40,
                                     prevalence=~ gov_eu_supporter*image_lag6m_scaled+
                                       part_of_speech+
                                       eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                       unemployment_scaled+inflation_scaled+north_south+
                                       Negotiation_Stage+Council_Config_final,
                                     data = docvars(data_dfm_r6),
                                     init.type = "Spectral")
  save(file="generated_data/stm_out_r6_lr.RData",stm_out_40_left_right_model)
}else{
  load(file="generated_data/stm_out_r6_lr.RData")
}

#### ...Frex Similarity ####
frex_main <- read.csv("generated_data/frex_words_40_main_model.csv") %>% mutate(complete = NA)
for(i in 1:nrow(frex_main)){
  frex_main$complete[i] <- paste(frex_main[i,3:22],collapse=" ")
}

frex_r6 <- stm::labelTopics(stm_out_40_left_right_model,n = 20)
frex_r6 <- frex_r6[["frex"]] %>% data.frame() %>% mutate(X=1:40,complete = NA)
for(i in 1:nrow(frex_r6)){
  frex_r6$complete[i] <- paste(frex_r6[i,1:20],collapse=" ")
}

all_words <- bind_rows(frex_main %>% 
                         select(X,complete) %>% 
                         mutate(model="main"),
                       frex_r6 %>% 
                         select(X,complete) %>% 
                         mutate(model="lr"))

dfmat <- corpus(all_words,text_field = "complete") %>% tokens() %>% dfm()
tstat2 <- textstat_simil(dfmat, method = "cosine", 
                         margin = "documents")

tstat2_pw <- as.matrix(tstat2) %>% 
  melt() %>% 
  filter(!is.na(value)) %>% 
  filter(X1!=X2) %>% 
  mutate(basetext = as.numeric(str_remove(X1,"text")),
         comptext = as.numeric(str_remove(X2,"text"))) %>% 
  mutate(robustness_test = case_when(
    comptext %in% 41:80 ~ "lr")) %>% 
  mutate(comptext = case_when(
    robustness_test =="lr" ~  comptext -40)) %>% 
  filter(basetext%in%1:40) %>% 
  arrange(robustness_test,comptext)

tstat2_pw %>% 
  filter(!is.na(robustness_test)) %>% 
  ggplot(aes(x=reorder(as.character(basetext),basetext), y=reorder(comptext,comptext), fill=value)) + 
  geom_tile(color = "white")+
  labs(x="Topic in Baseline Model",y="Topic in Model without Left-Right Position")+
  scale_fill_gradient2(low = "blue", high = "red", 
                       midpoint = median(tstat2_pw$value), 
                       limit = c(tstat2_pw %>% 
                                   select(value) %>% 
                                   min(),
                                 tstat2_pw %>% 
                                   select(value) %>% 
                                   max()), 
                       space = "Lab", 
                       name="Cosine Similarity") +
  coord_fixed()+ 
  theme(axis.text.y = element_text(size=8),
        axis.text.x = element_text(size=8),
        axis.title = element_text(size=12))

# This is Figure J12
ggsave("figures_appendix/figure_j_12.eps", width = 7, height = 7, units = "in")

tstat2_pw %>% 
  filter(basetext %in% c("12","13","22",
                         "32","38","40")) %>% 
  group_by(basetext) %>% 
  filter(value == max(value))

# Test: Is there a new citizen-related topic?
all_words %>% 
  filter(model=="left_right") %>% 
  filter(grepl("vote|citizen|public",complete))

#### ...estimateEffect ####

if(run_r6_lr_estimateEffect=="yes"){
  prep_council_lr <- estimateEffect_cluster(c(12,13,22,32,38,40) ~ gov_eu_supporter*image_lag6m_scaled+
                                              part_of_speech+
                                              eu_receipts_gdp_scaled+budget_any+unanimity_any+
                                              unemployment_scaled+inflation_scaled+north_south+
                                              Negotiation_Stage+Council_Config_final,
                                            stm_out_40_left_right_model,
                                            nsims = n_simulation,
                                            meta = docvars(data_dfm_r6), 
                                            uncertainty = "Global")
  
  save(file="generated_data/prep_council_r6_lr.RData",prep_council_lr)
}else{
  load(file="generated_data/prep_council_r6_lr.RData")
}

#### ...regressions ####

data.sum <- docvars(data_dfm_r6) %>% 
  data.frame()

if(run_r6_lr_regressions=="yes"){
  
  #### Regression Topic 12 ###
  tab_12 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 12,
                                        estimate_object = prep_council_lr)
  
  #### Regression Topic 13 ###
  tab_13 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 13,
                                        estimate_object = prep_council_lr)
  
  #### Regression Topic 22 ###
  tab_22 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 22,
                                        estimate_object = prep_council_lr)
  
  #### Regression Topic 32 ###
  tab_32 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 32,
                                        estimate_object = prep_council_lr)
  
  #### Regression Topic 38 ###
  tab_38 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 38,
                                        estimate_object = prep_council_lr)
  
  #### Regression Topic 40 ###
  tab_40 <- calc_interaction_regression_stm(var_1 = "image_lag6m_scaled",
                                        var_mod = "gov_eu_supporter",
                                        topic = 40,
                                        estimate_object = prep_council_lr)
  
  results_all_left_right <- bind_rows(tab_12,tab_13,tab_22,
                                      tab_32,tab_38,tab_40) %>% 
    mutate(Topic_name = c(rep("Delaying agreement",2),
                          rep("Formulating a demand",2),
                          rep("Supporting the compromise",2),
                          rep("More technical-level discussion needed",2),
                          rep("Cautious language",2),
                          rep("Raising a concern",2))) %>% 
    mutate(upper = Estimate + 1.96*Std.Error,
           lower = Estimate - 1.96*Std.Error) 
  
  results_all_left_right$moderator_level <- results_all_left_right$moderator_level %>% 
    recode("baseline" = "Pro-EU Government",
           "gov_eu_supporterEurosceptic Government:image_lag6m_scaled" = "Eurosceptic Government")
  
  results_all_r6 <- results_all_left_right
  save(file="generated_data/regression_results_r6.RData",results_all_r6)
}else{
  print("Loading saved regression interaction")
  load(file="generated_data/regression_results_r6.RData")
}

# This is Table J7
xtable::xtable(results_all_r6 %>% select(-upper,-lower) %>% dplyr::rename('Government Ideology' = "moderator_level",
                                                                                  'Topic Label' = "Topic_name",
                                                                                  't-value' = "tval",
                                                                                  'p-value' = "p"),digits=5)

europhiles_r6 <- results_all_r6 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Pro-EU Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

eurosceptics_r6 <- results_all_r6 %>% 
  dplyr::rename("term" = "Topic_name",
                "model" = "moderator_level",
                "estimate" = "Estimate",
                "std.error" = "Std.Error") %>% 
  mutate(feature = case_when(
    term %in% c("Supporting the compromise") ~ "Hypothesis 2",
    term %in% c("Delaying agreement","More technical-level discussion needed","Cautious language") ~ "Hypothesis 3",
    term %in% c("Formulating a demand","Raising a concern") ~ "Hypothesis 4"
  )) %>% 
  mutate(feature = as.factor(feature)) %>% 
  filter(model=="Eurosceptic Government") %>% 
  ggplot(aes(x=estimate,xmin=lower,xmax=upper,y=term,color=model)) +
  geom_point()+
  geom_errorbarh(height=0)+ 
  facet_grid(feature~model,scales = "free_y",space = "free")+
  theme_bw()+
  geom_vline(xintercept=0,lty="dashed")+
  scale_color_manual(values = c("black","black"))+
  xlab("Effect of public image on expected topic proportion") + 
  ylab("") +
  ggtitle("") + 
  theme_interaction

# This is Figure J13
ggpubr::ggarrange(eurosceptics_r6,europhiles_r6,ncol=1)
ggsave("figures_appendix/figure_j_13.eps", width = 6, height = 6, units = "in")

#### END ####
